* This program creates the key dataset for follow-up women data that gets used for subsequent regressions.

local folder "C:\Users\mlevere\OneDrive - Mathematica\Documents\Projects\Nepal\"

clear all
set more off

use "`folder'/Data/follow_up_raw_women", clear

merge m:1 www hh sn using "`folder'/Data/follow_up_raw_hh", nogen keep(3)

/* Rename all follow-up variables, so can merge in the baseline ones too */
local norenamevars = "district vdc vdc_code treatment cash_control info_control vdc_name www hh sn q00_001a control_vdc info_vdc cash_vdc"

qui ds `norenamevars', not
local varnames "`r(varlist)'"
foreach var of varlist `varnames' {
rename `var' `var'_e
}

rename q00_001a idc

tempfile women_data_panel
save "`women_data_panel'", replace

// Keep all women from households that were interviewed at baseline (i.e. no new households)
use "`folder'/Data/baseline_raw_hh.dta", clear
keep www hh district vdc vdc_code
merge 1:m www hh district vdc vdc_code using "`women_data_panel'", nogen keep(3)

save "`women_data_panel'", replace

// Add on baseline information where available
use "`folder'/Data/baseline_raw_women.dta", clear
merge m:1 www hh using "`folder'/Data/baseline_raw_hh.dta", nogen keep(3)

local norenamevars = "www hh idc district vdc vdc_code treatment cash_control info_control vdc_name control_vdc info_vdc cash_vdc"

local kvars "age_years school woman_numkids hh_head_age num_hh_members hh_head_male reg_electricity roof separate_kitchen woman_weight woman_height rooms stories hh_land farmland"

keep `norenamevars' `kvars'

// Rename the baseline variables we are keeping to have _b at end for simple merging
foreach var of local kvars {
rename `var' `var'_b
}

// Merge information about the baseline characteristics for youth (where available) onto the main data
// Recall that we are already only keeping households who were present at baseline 
merge 1:m `norenamevars' using "`women_data_panel'", keep(2 3)
gen has_baseline_data = _merge == 3
gen no_baseline_data = _merge == 2
drop _merge

/* Keep only those in wave 2 vdc's */

merge m:1 district vdc_name using "`folder'/Data/wave1_vdcs.dta", keep(1 3)
gen endline_wave = _merge == 1
drop _merge

drop if knowledge_firstmilk_e == .

/* Generate variables that will be used in analysis */

gen woman_tot_buckets_b = age_years_b
replace woman_tot_buckets_b = 20 if age_years_b <= 20
replace woman_tot_buckets_b = 25 if age_years_b >= 21 & age_years_b <= 25
replace woman_tot_buckets_b = 30 if age_years_b >= 26 & age_years_b <= 30
replace woman_tot_buckets_b = 35 if age_years_b >= 31
tab woman_tot_buckets_b, gen(woman_age_b)

replace school_b = 0 if school_b == 2

replace attend_co_meeting_e = 0 if attend_co_meeting_e == 2

replace reg_electricity_b = 0 if reg_electricity_b == 2
gen stone_roof_b = roof_b == 6
replace separate_kitchen_b = 0 if separate_kitchen_b == 2


gen hh_head_tot_buckets_b = floor(hh_head_age_b/10)
replace hh_head_tot_buckets_b = 2 if hh_head_age_b < 20
replace hh_head_tot_buckets_b = 6 if hh_head_age_b >= 70
tab hh_head_tot_buckets_b, gen(hh_head_age_b)

replace youngest_tt_injection_e = 0 if youngest_tt_injection_e == 2

replace youngest_irontab_e = 0 if youngest_irontab_e == 2

replace youngest_vitaminA_e = 0 if youngest_vitaminA_e == 2

replace youngest_3days_nonbreastmilk_e = 0 if youngest_3days_nonbreastmilk_e == 2

gen exclusive_6mo_e = knowledge_exclusivebreastfeed_e == 2

gen preg_more_e = knowledge_pregnant_food_e == 1

gen diarrhea_more_e = knowledge_breastfeed_diahrrea_e == 3

gen givebaby_firstmilk_e = knowledge_firstmilk_e == 1

gen feed_6t11_e = knowledge_6t11_fed_e >= 2 & knowledge_6t11_fed_e <= 4

gen snacks_12t24_e = knowledge_12t24_snacks_e >= 4 & knowledge_12t24_snacks_e <= 6

gen iron_3t4_e = knowledge_iron_supp_e == 3 | knowledge_iron_supp_e == 4

gen deworming_2ndtri_e = knowledge_deworming_e >= 4 & knowledge_deworming_e <= 7

gen vitaminA_45_e = knowledge_vitaminA_e <= 45

gen anc_checkups_4_e = knowledge_anc_checkups_e == 4

gen knowledge_index = givebaby_firstmilk_e + exclusive_6mo_e + feed_6t11_e + snacks_12t24_e + diarrhea_more_e + iron_3t4_e + deworming_2ndtri_e + vitaminA_45_e + preg_more_e + anc_checkups_4_e

gen exclusive_breastfed_6mos = youngest_breastfed_exclusive_e == 6 | (youngest_breastfed_exclusive_e == 99 & youngest_age_months_e <= 6)
gen first_breastfed_0hr = youngest_breastfed_first_e == 0
gen youngest_irontab_5t6mos = youngest_irontab_num_e >= 150 & youngest_irontab_num_e <= 210


local foods "porridge_ue porridge_e vegetables fruits eggs milk meat lentils"
foreach food of local foods {
gen mention_kid_`food' = goodfoods_kid_`food'_e == 1
}
egen mention_kids_items = rowtotal(mention_kid*) 


/* Food Groups */
gen dairy = youngest_past24hrs_formula_e == 1 | youngest_past24hrs_animalmilk_e == 1 | youngest_past24hrs_yogurt_e == 1

gen grain = youngest_past24hrs_fortified_e == 1 | youngest_past24hrs_porridge_e == 1 | youngest_past24hrs_rice_grains_e == 1 | youngest_past24hrs_potatoes_e == 1

gen vitA_veg = youngest_past24hrs_yo_veg_e == 1 | youngest_past24hrs_yo_fruits_e == 1 | youngest_past24hrs_leafy_veg_e == 1

gen oth_veg = youngest_past24hrs_oth_fruit_e == 1

gen eggs = youngest_past24hrs_eggs_e == 1

gen meat = youngest_past24hrs_meat_e == 1 | youngest_past24hrs_organ_meat_e == 1 | youngest_past24hrs_fish_e == 1 | youngest_past24hrs_snails_e == 1

gen nuts = youngest_past24hrs_lentils_e == 1

egen foodgroups = rowtotal(dairy-nuts)
replace foodgroups = . if youngest_over24months_e ~= 1 | youngest_breastfed_exclusive_e == 99

/* Replace some of the new variables as zeroes */

local vars "youngest_threebooks youngest_toys belief_teaching_kids youngest_meals_wfam youngest_convo"
foreach var of local vars {
replace `var' = 0 if `var' == 2
}


gen fertility_new = woman_numkids_e > woman_numkids_b & youngest_age_months_e <= 15 | curr_pregnant_e == 1
replace curr_pregnant_e = 0 if curr_pregnant_e == 2

gen newkids = 1 if youngest_age_months_e <= 37

gen values_health = health_ranking_e == 1
gen values_food = food_ranking_e == 1
gen values_educ = education_ranking_e == 1
gen values_security = security_ranking_e == 1

gen educ_expect_slc = child_educ_expect_e >= 11 & child_educ_expect_e <= 15

gen occ_expect_ag = child_occ_expect_e == 1
gen occ_expect_ind = child_occ_expect_e == 2
gen occ_expect_svc = child_occ_expect_e == 4

gen flag_women_baselineweight = woman_weight_b > 99

/* Drop households that were dropped at baseline */
merge m:1 www hh using "`folder'/Data/baseline_flag_hh.dta", nogen keep(3)
drop if flag_spending == 1 | flag_missingwomen == 1
drop flag*
svyset vdc_code, strata(district)

/* Establish outliers with the flag variable */

/* Replace the flag on HH Assets = 1 if:
   More than 10 rooms used by household
   4 story house
   Enormous values of Houshold Land/Farmland
   */
  
gen flag_hh_assets = 0
replace flag_hh_assets = 1 if rooms_b >= 10 & rooms_b ~= .
replace flag_hh_assets = 1 if stories_b >= 4 & stories_b ~= .
replace flag_hh_assets = 1 if hh_land_b > 10 & hh_land_b ~= .
replace flag_hh_assets = 1 if farmland_b > 80 & farmland_b ~= .

/* Replace the flag on HH Spending = 1 if:
   Monthly spending > Annual Income
   Monthly Spending over 50000
   Monthly Spending = 0
   Annual Income > 500000 Rs
   */

/* Replace the flag on Calories = 1 if:
   Calories per person exceed 5000 */
gen flag_cals = 0
replace flag_cals = 1 if cals_perperson_e > 5000

gen flag = flag_hh_assets == 1 | flag_cals == 1

/* Create indexes as in Anderson 2008: Knowledge */

local knowledge "exclusive_6mo preg_more diarrhea_more givebaby_firstmilk feed_6t11 snacks_12t24 iron_3t4 deworming_2ndtri vitaminA_45 anc_checkups_4"
foreach var of local knowledge {
egen `var'_m = mean(`var'_e) if control_vdc == 1 & flag == 0
egen `var'_sd_c = sd(`var'_e) if control_vdc == 1 & flag == 0
egen `var'_sd = max(`var'_sd_c)
gen `var'_z = (`var'_e-`var'_m)/`var'_sd
}

corrmat exclusive_6mo_z preg_more_z diarrhea_more_z givebaby_firstmilk_z feed_6t11_z snacks_12t24_z iron_3t4_z deworming_2ndtri_z vitaminA_45_z anc_checkups_4_z, covmat(know_cov)
mat ones = J(10,1,1)
mat weights = inv(ones'*inv(know_cov)*ones)
local weight = weights[1,1]
mat know_cov_inv = inv(know_cov)
foreach col of numlist 1/10 {
local weightvar`col' = know_cov_inv[1,`col']+know_cov_inv[2,`col']+know_cov_inv[3,`col']+know_cov_inv[4,`col'] + know_cov_inv[5,`col']+know_cov_inv[6,`col']+know_cov_inv[7,`col']+know_cov_inv[8,`col']+know_cov_inv[9,`col']+know_cov_inv[10,`col']
}
gen know_index = `weight'*(`weightvar1'*exclusive_6mo_z + `weightvar2'*preg_more_z + `weightvar3'*diarrhea_more_z + `weightvar4'*givebaby_firstmilk_z + `weightvar5'*feed_6t11_z + `weightvar6'*snacks_12t24_z + `weightvar7'*iron_3t4_z + `weightvar8'*deworming_2ndtri_z + `weightvar9'*vitaminA_45_z + `weightvar10'*anc_checkups_4_z)

/* Create indexes as in Anderson 2008: behavior with new children */

/* Make the health index - need to redefine all indicators as "good" health (i.e. not stunted, etc) and demean */
gen nonightblindness = youngest_nightblindness_e == 0

gen threedays_onlybreastmilk = youngest_3days_nonbreastmilk_e == 0

local demean "youngest_antenatal_care_e youngest_deworming_e youngest_vitaminA_e threedays_onlybreastmilk exclusive_breastfed_6mos first_breastfed_0hr youngest_irontab_5t6mos"
foreach var of local demean {
egen `var'_m_c = mean(`var') if control_vdc == 1 & newkids == 1 & flag == 0
egen `var'_sd_c = sd(`var') if control_vdc == 1 & newkids == 1 & flag == 0
egen `var'_m = max(`var'_m_c) if newkids == 1
egen `var'_sd = max(`var'_sd_c) if newkids == 1
gen `var'_z = (`var'-`var'_m)/`var'_sd if newkids == 1
}

replace youngest_antenatal_care_e = . if newkids == .
corrmat youngest_antenatal_care_e_z youngest_deworming_e_z youngest_vitaminA_e_z threedays_onlybreastmilk_z exclusive_breastfed_6mos_z first_breastfed_0hr_z youngest_irontab_5t6mos_z, covmat(behav_cov_z)
mat ones_z = J(7,1,1)
mat weights_z = inv(ones_z'*inv(behav_cov_z)*ones_z)
local weight_z = weights_z[1,1]
mat behav_cov_inv_z = inv(behav_cov_z)
foreach col of numlist 1/7 {
local weightvarz`col' = behav_cov_inv_z[1,`col']+behav_cov_inv_z[2,`col']+behav_cov_inv_z[3,`col']+behav_cov_inv_z[4,`col']+behav_cov_inv_z[5,`col']+behav_cov_inv_z[6,`col']+behav_cov_inv_z[7,`col']
}
gen behav_index_z = `weight_z'*(`weightvarz1'*youngest_antenatal_care_e_z + `weightvarz2'* youngest_deworming_e_z + `weightvarz3'* youngest_vitaminA_e_z + `weightvarz4'* threedays_onlybreastmilk_z + `weightvarz5'* exclusive_breastfed_6mos_z + `weightvarz6'* first_breastfed_0hr_z + `weightvarz7'* youngest_irontab_5t6mos_z)

by www hh, sort: gen wno = _n

label variable info_vdc "Info Only"
label variable cash_vdc "Info + Cash"

save "`folder'/Data/women_follow_up_reg_data", replace


